{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index-llms-openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"sk-\"\n",
    "\n",
    "import nest_asyncio\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#copied imports from llama-index example for subquestion\n",
    "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
    "from llama_index.core.tools import QueryEngineTool, ToolMetadata\n",
    "from llama_index.core.query_engine import SubQuestionQueryEngine\n",
    "from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler\n",
    "from llama_index.core import Settings\n",
    "\n",
    "#copied imports from llama-index example for multistep\n",
    "from llama_index.llms.openai import OpenAI\n",
    "from IPython.display import Markdown, display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# LLM (gpt-3.5)\n",
    "gpt35 = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
    "\n",
    "# LLM (gpt-4)\n",
    "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import os\n",
    "from llama_index.core import SimpleDirectoryReader\n",
    "import urllib.request\n",
    "\n",
    "os.makedirs('data/paul_graham/', exist_ok=True)\n",
    "\n",
    "# Download the file\n",
    "url = 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt'\n",
    "file_path = 'data/paul_graham/paul_graham_essay.txt'\n",
    "urllib.request.urlretrieve(url, file_path)\n",
    "\n",
    "# load documents\n",
    "pg_essay = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Using the LlamaDebugHandler to print the trace of the sub questions\n",
    "# captured by the SUB_QUESTION callback event type\n",
    "llama_debug = LlamaDebugHandler(print_trace_on_end=True)\n",
    "callback_manager = CallbackManager([llama_debug])\n",
    "\n",
    "Settings.callback_manager = callback_manager"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "index = VectorStoreIndex.from_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# build index and query engine\n",
    "vector_query_engine = VectorStoreIndex.from_documents(\n",
    "    pg_essay,\n",
    "    use_async=True,\n",
    ").as_query_engine()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# setup base query engine as tool\n",
    "query_engine_tools = [\n",
    "    QueryEngineTool(\n",
    "        query_engine=vector_query_engine,\n",
    "        metadata=ToolMetadata(\n",
    "            name=\"pg_essay\",\n",
    "            description=\"Paul Graham essay on What I Worked On\",\n",
    "        ),\n",
    "    ),\n",
    "]\n",
    "\n",
    "query_engine = SubQuestionQueryEngine.from_defaults(\n",
    "    query_engine_tools=query_engine_tools,\n",
    "    use_async=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "llama_debug.flush_event_logs() #flush for easier retrieval of subquestions\n",
    "\n",
    "response = query_engine.query(\n",
    "    \"Summarize the essay, and discuss the authors life before and after the essay was written.\"\n",
    ") #todo: figure out how to stop it from printing responses"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core.callbacks import CBEventType\n",
    "\n",
    "event_pairs = llama_debug.get_event_pairs(CBEventType.SUB_QUESTION)\n",
    "\n",
    "sub_questions = []\n",
    "for pair in event_pairs:\n",
    "    sub_q_object = str(pair[0].payload[\"sub_question\"])\n",
    "    start_index = sub_q_object.find(\"sub_question=\")\n",
    "    end_index = sub_q_object.find(\", tool_name=\")\n",
    "    if start_index == -1 or end_index == -1:\n",
    "        # smth wrong\n",
    "        continue\n",
    "\n",
    "    new_q = sub_q_object[start_index+14:end_index-1]\n",
    "    sub_questions.append(new_q)\n",
    "\n",
    "# probably a better way to do this than to substring\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core.indices.query.query_transform.base import (\n",
    "    StepDecomposeQueryTransform,\n",
    ")\n",
    "\n",
    "# gpt-4\n",
    "step_decompose_transform = StepDecomposeQueryTransform(llm=gpt4, verbose=True)\n",
    "\n",
    "# gpt-3\n",
    "step_decompose_transform_gpt3 = StepDecomposeQueryTransform(llm=gpt35, verbose=True)\n",
    "\n",
    "index_summary = \"Used to answer questions about the author\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core.query_engine import MultiStepQueryEngine\n",
    "\n",
    "mqe = VectorStoreIndex.from_documents(documents).as_query_engine(llm=gpt4)\n",
    "\n",
    "multi_query_engine = MultiStepQueryEngine(\n",
    "    query_engine=mqe,\n",
    "    query_transform=step_decompose_transform,\n",
    "    index_summary=index_summary,\n",
    ")\n",
    "\n",
    "for sub_q in sub_questions:\n",
    "    print(\"query: \", sub_q)\n",
    "    response = multi_query_engine.query(sub_q)\n",
    "    # print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
